{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 广州美食"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入库\n",
    "import pandas as pd \n",
    "import warnings # 忽略打印的警告\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>名称</th>\n",
       "      <th>评论数</th>\n",
       "      <th>人均价格</th>\n",
       "      <th>类别</th>\n",
       "      <th>商圈</th>\n",
       "      <th>地址</th>\n",
       "      <th>推荐1</th>\n",
       "      <th>推荐2</th>\n",
       "      <th>推荐3</th>\n",
       "      <th>口味评分</th>\n",
       "      <th>环境评分</th>\n",
       "      <th>服务评分</th>\n",
       "      <th>星级</th>\n",
       "      <th>店铺ID</th>\n",
       "      <th>网址</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>极炙·台灣精致炭火烤肉</td>\n",
       "      <td>3551.0</td>\n",
       "      <td>211元</td>\n",
       "      <td>日本料理</td>\n",
       "      <td>天河城/体育中心</td>\n",
       "      <td>天河路178-188号新天河宾馆院内综合楼3楼</td>\n",
       "      <td>极上和牛套餐</td>\n",
       "      <td>特选牛舌</td>\n",
       "      <td>元贝</td>\n",
       "      <td>9.1</td>\n",
       "      <td>8.5</td>\n",
       "      <td>9.2</td>\n",
       "      <td>五星商户</td>\n",
       "      <td>66250176</td>\n",
       "      <td>http://www.dianping.com/shop/66250176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>大滷爺(正佳店)</td>\n",
       "      <td>151.0</td>\n",
       "      <td>54元</td>\n",
       "      <td>粤菜</td>\n",
       "      <td>天河城/体育中心</td>\n",
       "      <td>体育东路正佳广场五楼广正街</td>\n",
       "      <td>滷爺鹅肉饭</td>\n",
       "      <td>生蚝沙锅粥</td>\n",
       "      <td>卤水鹅肝饭</td>\n",
       "      <td>9.3</td>\n",
       "      <td>9.1</td>\n",
       "      <td>9.1</td>\n",
       "      <td>五星商户</td>\n",
       "      <td>id110266597</td>\n",
       "      <td>http://www.dianping.com/shop/110266597</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>白天鹅宾馆·玉堂春暖餐厅</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>312元</td>\n",
       "      <td>粤菜</td>\n",
       "      <td>沙面</td>\n",
       "      <td>沙面南街1号白天鹅宾馆3楼</td>\n",
       "      <td>沙琪玛</td>\n",
       "      <td>葵花鸡</td>\n",
       "      <td>招牌虾饺</td>\n",
       "      <td>8.9</td>\n",
       "      <td>9.3</td>\n",
       "      <td>9.0</td>\n",
       "      <td>五星商户</td>\n",
       "      <td>id520094</td>\n",
       "      <td>http://www.dianping.com/shop/520094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Mr.Fish鱼鲜生海鲜放题(高德置地冬广场店)</td>\n",
       "      <td>7703.0</td>\n",
       "      <td>354元</td>\n",
       "      <td>自助餐</td>\n",
       "      <td>高德置地/花城汇</td>\n",
       "      <td>珠江新城花城大道85号高德置地冬广场5楼507-512铺</td>\n",
       "      <td>刺身新鲜</td>\n",
       "      <td>新西兰鳌虾</td>\n",
       "      <td>燕窝哈根达斯雪糕</td>\n",
       "      <td>8.8</td>\n",
       "      <td>9.0</td>\n",
       "      <td>8.7</td>\n",
       "      <td>准五星商户</td>\n",
       "      <td>id32501719</td>\n",
       "      <td>http://www.dianping.com/shop/32501719</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>漫活堂·健康有机西餐厅(高志店)</td>\n",
       "      <td>1174.0</td>\n",
       "      <td>148元</td>\n",
       "      <td>西餐</td>\n",
       "      <td>兴盛路/跑马场</td>\n",
       "      <td>黄埔大道西120号高志大厦首层107铺</td>\n",
       "      <td>低温慢煮牛小排</td>\n",
       "      <td>青苹果焦糖核桃沙拉</td>\n",
       "      <td>鲜虾墨鱼汁意大利面配参巴海鲜酱</td>\n",
       "      <td>9.1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>五星商户</td>\n",
       "      <td>id76972044</td>\n",
       "      <td>http://www.dianping.com/shop/76972044</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         名称     评论数  人均价格    类别        商圈  \\\n",
       "0               极炙·台灣精致炭火烤肉  3551.0  211元  日本料理  天河城/体育中心   \n",
       "1                  大滷爺(正佳店)   151.0   54元    粤菜  天河城/体育中心   \n",
       "2              白天鹅宾馆·玉堂春暖餐厅  2018.0  312元    粤菜        沙面   \n",
       "3  Mr.Fish鱼鲜生海鲜放题(高德置地冬广场店)  7703.0  354元   自助餐  高德置地/花城汇   \n",
       "4          漫活堂·健康有机西餐厅(高志店)  1174.0  148元    西餐   兴盛路/跑马场   \n",
       "\n",
       "                             地址      推荐1        推荐2              推荐3  口味评分  \\\n",
       "0       天河路178-188号新天河宾馆院内综合楼3楼   极上和牛套餐       特选牛舌               元贝   9.1   \n",
       "1                 体育东路正佳广场五楼广正街    滷爺鹅肉饭      生蚝沙锅粥            卤水鹅肝饭   9.3   \n",
       "2                 沙面南街1号白天鹅宾馆3楼      沙琪玛        葵花鸡             招牌虾饺   8.9   \n",
       "3  珠江新城花城大道85号高德置地冬广场5楼507-512铺     刺身新鲜      新西兰鳌虾         燕窝哈根达斯雪糕   8.8   \n",
       "4           黄埔大道西120号高志大厦首层107铺  低温慢煮牛小排  青苹果焦糖核桃沙拉  鲜虾墨鱼汁意大利面配参巴海鲜酱   9.1   \n",
       "\n",
       "   环境评分  服务评分     星级         店铺ID                                      网址  \n",
       "0   8.5   9.2   五星商户     66250176   http://www.dianping.com/shop/66250176  \n",
       "1   9.1   9.1   五星商户  id110266597  http://www.dianping.com/shop/110266597  \n",
       "2   9.3   9.0   五星商户     id520094     http://www.dianping.com/shop/520094  \n",
       "3   9.0   8.7  准五星商户   id32501719   http://www.dianping.com/shop/32501719  \n",
       "4   9.0   9.0   五星商户   id76972044   http://www.dianping.com/shop/76972044  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>店铺名称</th>\n",
       "      <th>评论数</th>\n",
       "      <th>人均价格</th>\n",
       "      <th>菜系</th>\n",
       "      <th>商圈</th>\n",
       "      <th>地址</th>\n",
       "      <th>推荐1</th>\n",
       "      <th>推荐2</th>\n",
       "      <th>推荐3</th>\n",
       "      <th>口味评分</th>\n",
       "      <th>环境评分</th>\n",
       "      <th>服务评分</th>\n",
       "      <th>星级</th>\n",
       "      <th>店铺ID</th>\n",
       "      <th>网址</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>稻荷料理亭（刺身专家）(杨箕店)</td>\n",
       "      <td>182</td>\n",
       "      <td>252</td>\n",
       "      <td>日本料理</td>\n",
       "      <td>东风东/杨箕</td>\n",
       "      <td>广州大道中富力东山新天地杨箕牌坊直入100米右侧</td>\n",
       "      <td>刺身拼盆</td>\n",
       "      <td>牛舌柚子胡椒卷</td>\n",
       "      <td>抹茶布丁</td>\n",
       "      <td>8.7</td>\n",
       "      <td>9.2</td>\n",
       "      <td>8.9</td>\n",
       "      <td>准五星商户</td>\n",
       "      <td>id92893759</td>\n",
       "      <td>http://www.dianping.com/shop/92893759</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>车库美式餐厅●不一样的汉堡(天汇igc分店)</td>\n",
       "      <td>128</td>\n",
       "      <td>50</td>\n",
       "      <td>西餐</td>\n",
       "      <td>珠江新城</td>\n",
       "      <td>天汇广场四楼foodsquare</td>\n",
       "      <td>经典双芝士牛肉堡</td>\n",
       "      <td>Mini汉堡</td>\n",
       "      <td>蔬菜浓汤</td>\n",
       "      <td>8.2</td>\n",
       "      <td>8.2</td>\n",
       "      <td>7.9</td>\n",
       "      <td>四星商户</td>\n",
       "      <td>id93691011</td>\n",
       "      <td>http://www.dianping.com/shop/93691011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>爷门串串(江南西店)</td>\n",
       "      <td>478</td>\n",
       "      <td>63</td>\n",
       "      <td>火锅</td>\n",
       "      <td>江南西</td>\n",
       "      <td>江南西路32号之2</td>\n",
       "      <td>销魂鸡丝凉面</td>\n",
       "      <td>虎虾串串</td>\n",
       "      <td>成都老酸奶</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.3</td>\n",
       "      <td>8.0</td>\n",
       "      <td>四星商户</td>\n",
       "      <td>id96089046</td>\n",
       "      <td>http://www.dianping.com/shop/96089046</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>你好！蓉晓姐串串香火锅</td>\n",
       "      <td>487</td>\n",
       "      <td>81</td>\n",
       "      <td>火锅</td>\n",
       "      <td>客村/赤岗</td>\n",
       "      <td>新港中路388号</td>\n",
       "      <td>滋补鸳鸯锅</td>\n",
       "      <td>竹签</td>\n",
       "      <td>虾滑</td>\n",
       "      <td>8.3</td>\n",
       "      <td>8.4</td>\n",
       "      <td>8.6</td>\n",
       "      <td>四星商户</td>\n",
       "      <td>id95025317</td>\n",
       "      <td>http://www.dianping.com/shop/95025317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>绿岛西餐Park Cafe(兰圃公园店)</td>\n",
       "      <td>3437</td>\n",
       "      <td>106</td>\n",
       "      <td>西餐</td>\n",
       "      <td>越秀公园</td>\n",
       "      <td>解放北路901号</td>\n",
       "      <td>雪花安格斯牛仔骨</td>\n",
       "      <td>蘑菇野菌忌廉汤</td>\n",
       "      <td>果木烟熏特级去骨牛小排</td>\n",
       "      <td>8.1</td>\n",
       "      <td>9.1</td>\n",
       "      <td>8.2</td>\n",
       "      <td>准五星商户</td>\n",
       "      <td>id18004914</td>\n",
       "      <td>http://www.dianping.com/shop/18004914</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     店铺名称   评论数  人均价格    菜系      商圈                        地址  \\\n",
       "0        稻荷料理亭（刺身专家）(杨箕店)   182   252  日本料理  东风东/杨箕  广州大道中富力东山新天地杨箕牌坊直入100米右侧   \n",
       "1  车库美式餐厅●不一样的汉堡(天汇igc分店)   128    50    西餐    珠江新城          天汇广场四楼foodsquare   \n",
       "2              爷门串串(江南西店)   478    63    火锅     江南西                 江南西路32号之2   \n",
       "3             你好！蓉晓姐串串香火锅   487    81    火锅   客村/赤岗                  新港中路388号   \n",
       "4    绿岛西餐Park Cafe(兰圃公园店)  3437   106    西餐    越秀公园                  解放北路901号   \n",
       "\n",
       "        推荐1      推荐2          推荐3  口味评分  环境评分  服务评分     星级        店铺ID  \\\n",
       "0      刺身拼盆  牛舌柚子胡椒卷         抹茶布丁   8.7   9.2   8.9  准五星商户  id92893759   \n",
       "1  经典双芝士牛肉堡   Mini汉堡         蔬菜浓汤   8.2   8.2   7.9   四星商户  id93691011   \n",
       "2    销魂鸡丝凉面     虎虾串串        成都老酸奶   8.0   8.3   8.0   四星商户  id96089046   \n",
       "3     滋补鸳鸯锅       竹签           虾滑   8.3   8.4   8.6   四星商户  id95025317   \n",
       "4  雪花安格斯牛仔骨  蘑菇野菌忌廉汤  果木烟熏特级去骨牛小排   8.1   9.1   8.2  准五星商户  id18004914   \n",
       "\n",
       "                                      网址  \n",
       "0  http://www.dianping.com/shop/92893759  \n",
       "1  http://www.dianping.com/shop/93691011  \n",
       "2  http://www.dianping.com/shop/96089046  \n",
       "3  http://www.dianping.com/shop/95025317  \n",
       "4  http://www.dianping.com/shop/18004914  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>店铺ID</th>\n",
       "      <th>公共交通</th>\n",
       "      <th>纬度</th>\n",
       "      <th>经度</th>\n",
       "      <th>行政区名称</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>id10002222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>23.081253</td>\n",
       "      <td>113.328349</td>\n",
       "      <td>海珠区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>id100412754</td>\n",
       "      <td>NaN</td>\n",
       "      <td>23.095611</td>\n",
       "      <td>113.272257</td>\n",
       "      <td>海珠区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>id101488702</td>\n",
       "      <td>NaN</td>\n",
       "      <td>23.094872</td>\n",
       "      <td>113.272726</td>\n",
       "      <td>海珠区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>id101520850</td>\n",
       "      <td>NaN</td>\n",
       "      <td>23.127921</td>\n",
       "      <td>113.253285</td>\n",
       "      <td>荔湾区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>id101937385</td>\n",
       "      <td>NaN</td>\n",
       "      <td>23.118147</td>\n",
       "      <td>113.326413</td>\n",
       "      <td>天河区</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          店铺ID 公共交通         纬度          经度 行政区名称\n",
       "0   id10002222  NaN  23.081253  113.328349   海珠区\n",
       "1  id100412754  NaN  23.095611  113.272257   海珠区\n",
       "2  id101488702  NaN  23.094872  113.272726   海珠区\n",
       "3  id101520850  NaN  23.127921  113.253285   荔湾区\n",
       "4  id101937385  NaN  23.118147  113.326413   天河区"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "a_food = pd.read_csv('A网站美食数据.csv',encoding='gbk')\n",
    "b_food = pd.read_csv('B网站美食数据.csv',encoding='gbk')\n",
    "local = pd.read_csv('店铺地图信息.csv',encoding='gbk')\n",
    "display(a_food.head())\n",
    "display(b_food.head())\n",
    "display(local.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据清洗"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>店铺名称</th>\n",
       "      <th>评论数</th>\n",
       "      <th>人均价格</th>\n",
       "      <th>菜系</th>\n",
       "      <th>商圈</th>\n",
       "      <th>地址</th>\n",
       "      <th>推荐1</th>\n",
       "      <th>推荐2</th>\n",
       "      <th>推荐3</th>\n",
       "      <th>口味评分</th>\n",
       "      <th>环境评分</th>\n",
       "      <th>服务评分</th>\n",
       "      <th>星级</th>\n",
       "      <th>店铺ID</th>\n",
       "      <th>网址</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>极炙·台灣精致炭火烤肉</td>\n",
       "      <td>3551.0</td>\n",
       "      <td>211</td>\n",
       "      <td>日本料理</td>\n",
       "      <td>天河城/体育中心</td>\n",
       "      <td>天河路178-188号新天河宾馆院内综合楼3楼</td>\n",
       "      <td>极上和牛套餐</td>\n",
       "      <td>特选牛舌</td>\n",
       "      <td>元贝</td>\n",
       "      <td>9.1</td>\n",
       "      <td>8.5</td>\n",
       "      <td>9.2</td>\n",
       "      <td>五星商户</td>\n",
       "      <td>66250176</td>\n",
       "      <td>http://www.dianping.com/shop/66250176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>大滷爺(正佳店)</td>\n",
       "      <td>151.0</td>\n",
       "      <td>54</td>\n",
       "      <td>粤菜</td>\n",
       "      <td>天河城/体育中心</td>\n",
       "      <td>体育东路正佳广场五楼广正街</td>\n",
       "      <td>滷爺鹅肉饭</td>\n",
       "      <td>生蚝沙锅粥</td>\n",
       "      <td>卤水鹅肝饭</td>\n",
       "      <td>9.3</td>\n",
       "      <td>9.1</td>\n",
       "      <td>9.1</td>\n",
       "      <td>五星商户</td>\n",
       "      <td>id110266597</td>\n",
       "      <td>http://www.dianping.com/shop/110266597</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>白天鹅宾馆·玉堂春暖餐厅</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>312</td>\n",
       "      <td>粤菜</td>\n",
       "      <td>沙面</td>\n",
       "      <td>沙面南街1号白天鹅宾馆3楼</td>\n",
       "      <td>沙琪玛</td>\n",
       "      <td>葵花鸡</td>\n",
       "      <td>招牌虾饺</td>\n",
       "      <td>8.9</td>\n",
       "      <td>9.3</td>\n",
       "      <td>9.0</td>\n",
       "      <td>五星商户</td>\n",
       "      <td>id520094</td>\n",
       "      <td>http://www.dianping.com/shop/520094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Mr.Fish鱼鲜生海鲜放题(高德置地冬广场店)</td>\n",
       "      <td>7703.0</td>\n",
       "      <td>354</td>\n",
       "      <td>自助餐</td>\n",
       "      <td>高德置地/花城汇</td>\n",
       "      <td>珠江新城花城大道85号高德置地冬广场5楼507-512铺</td>\n",
       "      <td>刺身新鲜</td>\n",
       "      <td>新西兰鳌虾</td>\n",
       "      <td>燕窝哈根达斯雪糕</td>\n",
       "      <td>8.8</td>\n",
       "      <td>9.0</td>\n",
       "      <td>8.7</td>\n",
       "      <td>准五星商户</td>\n",
       "      <td>id32501719</td>\n",
       "      <td>http://www.dianping.com/shop/32501719</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>漫活堂·健康有机西餐厅(高志店)</td>\n",
       "      <td>1174.0</td>\n",
       "      <td>148</td>\n",
       "      <td>西餐</td>\n",
       "      <td>兴盛路/跑马场</td>\n",
       "      <td>黄埔大道西120号高志大厦首层107铺</td>\n",
       "      <td>低温慢煮牛小排</td>\n",
       "      <td>青苹果焦糖核桃沙拉</td>\n",
       "      <td>鲜虾墨鱼汁意大利面配参巴海鲜酱</td>\n",
       "      <td>9.1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>五星商户</td>\n",
       "      <td>id76972044</td>\n",
       "      <td>http://www.dianping.com/shop/76972044</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       店铺名称     评论数 人均价格    菜系        商圈  \\\n",
       "0               极炙·台灣精致炭火烤肉  3551.0  211  日本料理  天河城/体育中心   \n",
       "1                  大滷爺(正佳店)   151.0   54    粤菜  天河城/体育中心   \n",
       "2              白天鹅宾馆·玉堂春暖餐厅  2018.0  312    粤菜        沙面   \n",
       "3  Mr.Fish鱼鲜生海鲜放题(高德置地冬广场店)  7703.0  354   自助餐  高德置地/花城汇   \n",
       "4          漫活堂·健康有机西餐厅(高志店)  1174.0  148    西餐   兴盛路/跑马场   \n",
       "\n",
       "                             地址      推荐1        推荐2              推荐3  口味评分  \\\n",
       "0       天河路178-188号新天河宾馆院内综合楼3楼   极上和牛套餐       特选牛舌               元贝   9.1   \n",
       "1                 体育东路正佳广场五楼广正街    滷爺鹅肉饭      生蚝沙锅粥            卤水鹅肝饭   9.3   \n",
       "2                 沙面南街1号白天鹅宾馆3楼      沙琪玛        葵花鸡             招牌虾饺   8.9   \n",
       "3  珠江新城花城大道85号高德置地冬广场5楼507-512铺     刺身新鲜      新西兰鳌虾         燕窝哈根达斯雪糕   8.8   \n",
       "4           黄埔大道西120号高志大厦首层107铺  低温慢煮牛小排  青苹果焦糖核桃沙拉  鲜虾墨鱼汁意大利面配参巴海鲜酱   9.1   \n",
       "\n",
       "   环境评分  服务评分     星级         店铺ID                                      网址  \n",
       "0   8.5   9.2   五星商户     66250176   http://www.dianping.com/shop/66250176  \n",
       "1   9.1   9.1   五星商户  id110266597  http://www.dianping.com/shop/110266597  \n",
       "2   9.3   9.0   五星商户     id520094     http://www.dianping.com/shop/520094  \n",
       "3   9.0   8.7  准五星商户   id32501719   http://www.dianping.com/shop/32501719  \n",
       "4   9.0   9.0   五星商户   id76972044   http://www.dianping.com/shop/76972044  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 合并数据集\n",
    "# 修改 A 的【名称】列为【店铺名称】，【类别】列为【菜系】\n",
    "# 【人均价格】的【元】去掉\n",
    "a_food.rename(columns={'名称':'店铺名称','类别':'菜系'},inplace=True)\n",
    "a_food['人均价格'] = a_food['人均价格'].str.replace('元','')\n",
    "a_food.head(5)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(300, 15)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "(74, 15)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "(374, 17)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 纵向合并两个网站数据，去重\n",
    "display(a_food.shape)\n",
    "display(b_food.shape)\n",
    "food = pd.concat([a_food,b_food],ignore_index=True).drop_duplicates()\n",
    "display(food.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(372, 21)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 以【店铺id】为键进行合并，没有 ID 的店铺会丢掉\n",
    "food1 = pd.merge(food,local,on='店铺ID')\n",
    "food1.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 选择需要的列，查看缺失值，类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 372 entries, 0 to 371\n",
      "Data columns (total 15 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   店铺名称    74 non-null     object \n",
      " 1   评论数     372 non-null    float64\n",
      " 2   人均价格    372 non-null    object \n",
      " 3   菜系      74 non-null     object \n",
      " 4   商圈      372 non-null    object \n",
      " 5   推荐1     372 non-null    object \n",
      " 6   推荐2     372 non-null    object \n",
      " 7   推荐3     372 non-null    object \n",
      " 8   口味评分    372 non-null    float64\n",
      " 9   环境评分    372 non-null    float64\n",
      " 10  服务评分    372 non-null    float64\n",
      " 11  星级      372 non-null    object \n",
      " 12  纬度      372 non-null    float64\n",
      " 13  经度      372 non-null    float64\n",
      " 14  行政区名称   372 non-null    object \n",
      "dtypes: float64(6), object(9)\n",
      "memory usage: 46.5+ KB\n"
     ]
    }
   ],
   "source": [
    "df = food1[['店铺名称','评论数','人均价格','菜系','商圈','推荐1','推荐2','推荐3',\n",
    "           '口味评分','环境评分','服务评分','星级','纬度','经度','行政区名称']]\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据可视化代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts.charts import Geo\n",
    "from pyecharts import options as opts\n",
    "from pyecharts.globals import GeoType\n",
    "from pyecharts.globals import ThemeType\n",
    "\n",
    "def geo():\n",
    "    g = Geo(init_opts=opts.InitOpts(theme=ThemeType.PURPLE_PASSION))\n",
    "    g.add_schema(maptype='广州')\n",
    "\n",
    "    # 定义坐标对应的名称，添加到坐标库中 add_coordinate(name, lng, lat)\n",
    "    names = list(df['店铺名称'])\n",
    "    lng = list(df['经度'])\n",
    "    lat = list(df['纬度'])\n",
    "    for i in range(len(names)):\n",
    "        g.add_coordinate(names[i],lng[i],lat[i])\n",
    "\n",
    "    # 定义数据对，\n",
    "    data_pair = [(name,50) for name in names]\n",
    "\n",
    "    # 将数据添加到地图上\n",
    "    g.add('', data_pair, type_=GeoType.EFFECT_SCATTER, symbol_size=3)\n",
    "\n",
    "    # 设置样式\n",
    "    g.set_series_opts(label_opts=opts.LabelOpts(is_show=False))\n",
    "    g.set_global_opts(\n",
    "            visualmap_opts=opts.VisualMapOpts(is_show=False),\n",
    "            title_opts=opts.TitleOpts(title=\"广州美食分布\"),\n",
    "        )\n",
    "    g.render('广州美食分布.html')\n",
    "    return g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 绘制饼图\n",
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Pie\n",
    "from pyecharts.globals import ThemeType\n",
    "\n",
    "def pie():\n",
    "    x = ['天河区', '越秀区', '海珠区', '荔湾区', '番禺区', '白云区', '黄埔区']\n",
    "    y = [164, 69, 68, 27, 23, 20, 1]\n",
    "\n",
    "    c = (\n",
    "        Pie(init_opts=opts.InitOpts(theme=ThemeType.PURPLE_PASSION))\n",
    "        .add(\"\", [list(z) for z in zip(x, y)])\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(title=\"行政区美食占比\"))\n",
    "        .set_series_opts(label_opts=opts.LabelOpts(formatter=\"{b}: {c}\"))\n",
    "    )\n",
    "    c.render('行政区美食占比.html')\n",
    "    return c\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "珠江新城         60\n",
       "天河城/体育中心     42\n",
       "北京路          34\n",
       "江南西          23\n",
       "天河北          22\n",
       "江南大道         10\n",
       "兴盛路/跑马场      10\n",
       "高德置地/花城汇     10\n",
       "机场路          10\n",
       "石牌/龙口        10\n",
       "中山七八路         9\n",
       "工业大道          9\n",
       "环市东           9\n",
       "南村            8\n",
       "客村/赤岗         8\n",
       "滨江路           7\n",
       "琶洲            7\n",
       "芳村            6\n",
       "中山二三路/东山口     6\n",
       "小北/淘金         5\n",
       "沙面            5\n",
       "车陂/东圃         4\n",
       "五羊新城          4\n",
       "东风东/杨箕        4\n",
       "同和/京溪         3\n",
       "市桥            3\n",
       "越秀公园          3\n",
       "新港西路          3\n",
       "上下九           3\n",
       "白云绿地中心        3\n",
       "万达广场          2\n",
       "长隆            2\n",
       "天河公园          2\n",
       "洛溪            2\n",
       "天河区           2\n",
       "番禺广场          2\n",
       "荔湾区           2\n",
       "萝岗万达广场        1\n",
       "钟村            1\n",
       "大石            1\n",
       "东晓南           1\n",
       "科学城           1\n",
       "西村/西场         1\n",
       "五号停机坪         1\n",
       "黄石            1\n",
       "沿江路/二沙岛       1\n",
       "时尚天河          1\n",
       "南浦            1\n",
       "厦滘            1\n",
       "番禺区           1\n",
       "康王路           1\n",
       "区庄/动物园        1\n",
       "天河客运站         1\n",
       "桥南            1\n",
       "海珠广场          1\n",
       "Name: 商圈, dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 商圈分析\n",
    "bussiness_area = df['商圈'].value_counts()\n",
    "bussiness_area"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Bar\n",
    "from pyecharts.globals import ThemeType\n",
    "\n",
    "def bar():\n",
    "    x = ['珠江新城','天河城/体育中心','北京路','江南西','天河北','机场路',\n",
    "         '江南大道','高德置地/花城汇','石牌/龙口','兴盛路/跑马场']\n",
    "    y = [60, 42, 34, 23, 22, 10, 10, 10, 10, 10]\n",
    "\n",
    "    c = (\n",
    "        Bar(init_opts=opts.InitOpts(theme=ThemeType.PURPLE_PASSION))\n",
    "        .add_xaxis(x)\n",
    "        .add_yaxis(\"\",y)\n",
    "        .set_global_opts(\n",
    "            title_opts=opts.TitleOpts(title=\"商圈分布\"),\n",
    "            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)))\n",
    "    )\n",
    "    c.render('商圈分布.html')\n",
    "    return c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "四星商户     175\n",
       "准五星商户    156\n",
       "五星商户      34\n",
       "准四星商户      7\n",
       "Name: 星级, dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 星级分析\n",
    "start = df['星级'].value_counts()\n",
    "start"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Pie\n",
    "from pyecharts.globals import ThemeType\n",
    "\n",
    "def pie_circle():\n",
    "    x = ['四星商户','准五星商户','五星商户','准四星商户']\n",
    "    y = [175,156,34,7]\n",
    "\n",
    "    c = (\n",
    "        Pie(init_opts=opts.InitOpts(theme=ThemeType.PURPLE_PASSION))\n",
    "        .add(\n",
    "            \"\",\n",
    "            [list(z) for z in zip(x,y)],\n",
    "            radius=[\"40%\", \"55%\"],\n",
    "            label_opts=opts.LabelOpts(\n",
    "                position=\"outside\",\n",
    "                formatter=\"{a|{a}}{abg|}\\n{hr|}\\n {b|{b}: }{c}  {per|{d}%}  \",\n",
    "                background_color=\"#eee\",\n",
    "                border_color=\"#aaa\",\n",
    "                border_width=1,\n",
    "                border_radius=4,\n",
    "                rich={\n",
    "                    \"a\": {\"color\": \"#999\", \"lineHeight\": 22, \"align\": \"center\"},\n",
    "                    \"abg\": {\n",
    "                        \"backgroundColor\": \"#e3e3e3\",\n",
    "                        \"width\": \"100%\",\n",
    "                        \"align\": \"right\",\n",
    "                        \"height\": 22,\n",
    "                        \"borderRadius\": [4, 4, 0, 0],\n",
    "                    },\n",
    "                    \"hr\": {\n",
    "                        \"borderColor\": \"#aaa\",\n",
    "                        \"width\": \"100%\",\n",
    "                        \"borderWidth\": 0.5,\n",
    "                        \"height\": 0,\n",
    "                    },\n",
    "                    \"b\": {\"fontSize\": 16, \"lineHeight\": 33},\n",
    "                    \"per\": {\n",
    "                        \"color\": \"#eee\",\n",
    "                        \"backgroundColor\": \"#334455\",\n",
    "                        \"padding\": [2, 4],\n",
    "                        \"borderRadius\": 2,\n",
    "                    },\n",
    "                },\n",
    "            ),\n",
    "        )\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(title=\"星级占比\"))\n",
    "    )\n",
    "    c.render('星级占比.html')\n",
    "    return c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "火锅      13\n",
       "日本料理    12\n",
       "粤菜      11\n",
       "西餐      11\n",
       "川菜       4\n",
       "自助餐      4\n",
       "东南亚菜     4\n",
       "烧烤       4\n",
       "面包甜点     3\n",
       "海鲜       2\n",
       "咖啡厅      2\n",
       "小龙虾      1\n",
       "湘菜       1\n",
       "快餐简餐     1\n",
       "私房菜      1\n",
       "Name: 菜系, dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 美食分析\n",
    "kind = df['菜系'].value_counts()\n",
    "kind"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import Bar\n",
    "from pyecharts.globals import ThemeType\n",
    "\n",
    "def bar_reserve():\n",
    "    # 坐标轴不够，就取前 20\n",
    "    x = ['西餐',\n",
    "     '粤菜',\n",
    "     '日本料理',\n",
    "     '火锅',\n",
    "     '面包甜点',\n",
    "     '自助餐',\n",
    "     '烧烤',\n",
    "     '咖啡厅',\n",
    "     '东南亚菜',\n",
    "     '川菜',\n",
    "     '茶餐厅',\n",
    "     '韩国料理',\n",
    "     '创意菜',\n",
    "     '韩式料理',\n",
    "     '湘菜',\n",
    "     '素菜',\n",
    "     '海鲜',\n",
    "     '小龙虾',\n",
    "     '茶餐馆',\n",
    "     '江浙菜',\n",
    "     '其他美食',\n",
    "     '台湾菜',\n",
    "     '快餐简餐',\n",
    "     '西北菜',\n",
    "     '私房菜',\n",
    "     '新疆菜',\n",
    "     '家常菜',\n",
    "     '粥粉面']\n",
    "    x = x[::-1][8:]\n",
    "    y = [61,\n",
    "     53,\n",
    "     52,\n",
    "     43,\n",
    "     21,\n",
    "     20,\n",
    "     18,\n",
    "     14,\n",
    "     11,\n",
    "     11,\n",
    "     9,\n",
    "     9,\n",
    "     7,\n",
    "     6,\n",
    "     6,\n",
    "     5,\n",
    "     4,\n",
    "     4,\n",
    "     3,\n",
    "     3,\n",
    "     3,\n",
    "     2,\n",
    "     2,\n",
    "     1,\n",
    "     1,\n",
    "     1,\n",
    "     1,\n",
    "     1]\n",
    "    y = y[::-1][8:]\n",
    "\n",
    "    c = (\n",
    "        Bar(init_opts=opts.InitOpts(theme=ThemeType.PURPLE_PASSION))\n",
    "        .add_xaxis(x)\n",
    "        .add_yaxis(\"\", y)\n",
    "        .reversal_axis()\n",
    "        .set_series_opts(label_opts=opts.LabelOpts(position=\"right\"))\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(title=\"美食种类分布\"))\n",
    "    )\n",
    "    c.render('美食种类分布.html')\n",
    "    return c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Dumping model to file cache /var/folders/6q/33ft2qpj4qqdcxdtxzzt3gb40000gn/T/jieba.cache\n",
      "Loading model cost 0.547 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    }
   ],
   "source": [
    "# 将所有推荐美食连接起来\n",
    "w1 = list(df['推荐1'])\n",
    "w2 = list(df['推荐2'])\n",
    "w3 = list(df['推荐3'])\n",
    "w1 = ''.join(w1)\n",
    "w2 = ''.join(w2)\n",
    "w3 = ''.join(w3)\n",
    "txt = w1 + w2 + w3\n",
    "\n",
    "# 分词\n",
    "import jieba\n",
    "ls = jieba.lcut(txt)\n",
    "\n",
    "# 计算词频\n",
    "word = {}\n",
    "for w in ls:\n",
    "    word[w] = word.get(w,0) + 1\n",
    "\n",
    "words = list(word.items())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import WordCloud\n",
    "from pyecharts.globals import SymbolType\n",
    "from pyecharts.globals import ThemeType\n",
    "\n",
    "def worldcloud():\n",
    "    c = (\n",
    "        WordCloud(init_opts=opts.InitOpts(theme=ThemeType.PURPLE_PASSION))\n",
    "        .add(\"\", words, word_size_range=[20, 100], shape=SymbolType.DIAMOND)\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(title=\"\"))\n",
    "    )\n",
    "    c.render('美食词云.html')\n",
    "    return c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
